Generate Passport Index datasets

Data by Passport Index 2020: https://www.passportindex.org/
In both tidy and matrix formats
Using ISO-2, ISO-3, and full country names



In [1]:

    
from bs4 import BeautifulSoup
import requests
import js2py
import pandas as pd
from itertools import permutations

Read PassportIndex page

Use Beautiful Soup to extract JS with arrays containing data on visa free, visa on arrival, and eta.



In [2]:

    
source = requests.get('https://www.passportindex.org/comparebyPassport.php').content.decode('utf-8')



In [3]:

    
soup = BeautifulSoup(source)

for s in soup.findAll('script'):
    if 'com_c_vf' in s.text:
        script = 'function a() {' \
            + s.text + \
            '; return [\
                Object.keys(com_c_vf).map(function(z){return [z, com_c_vf[z]]}),\
                Object.keys(com_c_vf).map(function(z){return [z, com_c_voa[z]]}),\
                Object.keys(com_c_vf).map(function(z){return [z, com_c_eta[z]]}),\
                Object.keys(com_c_vf).map(function(z){return [z, so_vf[z]]}),\
            ]};a()'

data = js2py.eval_js(script)



In [4]:

    
so = {
    '0': 'VF',
    '1': 7,
    '2': 14,
    '3': 90,
    '4': 28,
    '5': 30,
    '6': 180,
    '7': 360,
    '8': 31,
    '9': '-',
    '12': 60,
    '13': 15,
    '14': 120,
    '15': 240,
    '16:': 45,
    '17': 21,
    '18': 42,
}

Begin data processing



In [5]:

    
url = 'https://gist.githubusercontent.com/ilyankou/b2580c632bdea4af2309dcaa69860013/raw/420fb417bcd17d833156efdf64ce8a1c3ceb2691/country-codes'
codes = pd.read_csv(url, dtype=str).fillna('NA').set_index('ISO2')

def fix_iso2(x):
    o = {
        'UK': 'GB',
        'RK': 'XK'
    }
    return o[x] if x in o else x



In [6]:

    
multiindex = pd.MultiIndex.from_tuples(
    list(permutations(codes.index, 2)), names=['Passport', 'Destination']
)

# By default, all countries need visas, so set all values to 0
tidy_iso2 = pd.DataFrame(index=multiindex)
tidy_iso2['Code'] = 'VR'

# i=0 for visa free (so value of 3)
# 1=1 for visa on arrival (value of 2)
# i=2 for eta (value of 1)
for i in range(3):
    for j in range(len(data[i])):
        passport = fix_iso2( data[i][j][0] ) # correct UK and Kosovo codes
        countries = data[i][j][1].split(',')
        
        # For visa free, we put number of dates
        if i == 0:
            vf2days = data[3][j][1].split(',')
            
        for k in range(len(countries)):
            country = countries[k]

            if country == '':
                continue
                
            country = fix_iso2(country)  # correct UK and Kosovo codes
            
            tidy_iso2.loc[(passport, country), 'Code'] = so[vf2days[k]] if i == 0 else 'ETA' if i == 1 else 'VOA'

Save ISO-2 files, both matrix and tidy



In [7]:

    
tidy_iso2.to_csv('passport-index-tidy-iso2.csv')

tidy_iso2.reset_index().pivot(columns='Destination', index='Passport', values='Code')\
    .fillna(-1).to_csv('passport-index-matrix-iso2.csv')

Translate ISO-2 into ISO-3 and save both datasets



In [8]:

    
tidy_iso3 = tidy_iso2.copy(deep=True).reset_index()
tidy_iso3['Passport'] = tidy_iso3['Passport'].apply(lambda x: codes.loc[x]['ISO3'])
tidy_iso3['Destination'] = tidy_iso3['Destination'].apply(lambda x: codes.loc[x]['ISO3'])

tidy_iso3.to_csv('passport-index-tidy-iso3.csv', index=False)
tidy_iso3.reset_index().pivot(columns='Destination', index='Passport', values='Code')\
    .fillna(-1).to_csv('passport-index-matrix-iso3.csv')

Translate ISO-2 into ISO-3 and save both datasets



In [9]:

    
tidy_names = tidy_iso2.copy(deep=True).reset_index()
tidy_names['Passport'] = tidy_names['Passport'].apply(lambda x: codes.loc[x]['Country'])
tidy_names['Destination'] = tidy_names['Destination'].apply(lambda x: codes.loc[x]['Country'])

tidy_names.to_csv('passport-index-tidy.csv', index=False)
tidy_names.reset_index().pivot(columns='Destination', index='Passport', values='Code')\
    .fillna(-1).to_csv('passport-index-matrix.csv')



In [ ]:



In [ ]: